/// Collapse hourly actions to get outcome variables

********************************************************************************
*** first estimate outcomes that can be calculated with just given insurance actions
	*days purchased, hour of cancellation, bundle activity, etc

use "data/cleaned/hugo/All balance actions.dta", clear

* Merge in time of cancellation for canceled users
preserve
	use "data/inputs/hugo/new_coverage_activity.dta", clear
	keep if inlist(type,"suspend_membership") & !inlist(reason,"END OF STUDY","End of study")
	bysort id: egen maxtime = max(timestamp)
	keep if timestamp==maxtime
	drop maxtime
	sort id timestamp
	rename timestamp time_ended
	isid id
	tempfile end
	save `end'
restore

merge m:1 id using `end', nogen keep(1 3)

*get hour of cancellation for those who cancelled
gen hour_canceled1 = round((time_ended - firstday)/(1000*60*60*24)*24,1)
replace hour_canceled1 = 0 if hour_canceled1<0 & !mi(hour_canceled1)
bysort id: egen hour_canceled = median(hour_canceled1)
drop hour_canceled1

*get hour from user creation to bind
gen hourfromcrtobind = round((timestamp_bind-timestamp_usercreated)/(1000*60*60),1)

*calculate hour of action
gen hour = round(timesincestart*24,1)
assert hour>=0 if type=="pay_coverage"
drop if hour<0 & source!="initial_bind"
replace hour = 0 if bound==0
keep id stripecustomerid twomonth twomonthcr threemonth threemonthcr firstday hour_canceled cohort ///
	initialbinddays hourfromcrtobind daily bundle bound source subtype type hour ///
	reservebalanceindays balanceindays amount amount_paid dayrate paydays ///
	timestamp_usercreated timestamp_bind basedayrate paydays* payment* bundled* payments
*mark where coverage starts and balances at that time
gen cov = type=="pay_coverage"
gen resbal = reservebalanceindays
gen regbal = balanceindays

*drop hours after cancellation
drop if hour>hour_canceled

*** get bundle as fraction of payments
assert !mi(paydays) if type=="add_balance"
gen isbundle = inlist(paydays, 14,30) if !mi(paydays)
gen ispayment = !mi(paydays)
drop payments
bysort id: egen payments = sum(ispayment)

*** get bundling activity
by id: egen everbundler = max(isbundle)
gen neverbundler = 1-everbundler
by id: egen evernonbundler = max(isbundle==0)

by id: egen everbundlernofirst = max(isbundle==1 & source!="initial_bind")
gen neverbundlernofirst = 1-everbundlernofirst
by id: egen evernonbundlernofirst = max(isbundle==0 & source!="initial_bind")

foreach var of varlist *bundler {
	replace `var' = . if daily==0 | bound==0
}

foreach var of varlist *nofirst {
	replace `var' = . if daily==0 | bound==0
	replace `var' = . if payments==1
}

bysort id: gen nval=_n
tab everbundler if nval==1
tab evernonbundler if nval==1
tab evernonbundlernofirst if nval==1


*** estimate total purchases
*** get amount paid
replace amount_paid = amount if mi(amount_paid) & !mi(amount)

*total of all purchases
bysort id: egen totpurchased = sum(amount_paid * ((type == "add_balance"))
gen daypurch = round(totpurchased / dayrate, 1)

*** save user-level usage data for merging back on once hours are filled in
preserve
	keep id stripecustomerid cohort bundle daypurch *bundler *bundlernofirst initialbinddays ///
		timestamp_usercreated timestamp_bind bound basedayrate ///
		paydays payments daily payment* paydays* bundled* payments hour_canceled
	keep if !mi(paydays) | bound==0
	bysort id: gen nval=_n
	reshape wide paydays, i(id) j(nval)

	gen num3 = 0 if daily == 1
	gen num7 = 0 if daily == 1
	gen num14 = 0 if daily == 1
	gen num30 = 0 if daily == 1

	foreach var of varlist paydays* {
		if "`var'" != "paydays1" {
			replace num3 = num3 + 1 if `var' == 3 | `var' == 2
			replace num7 = num7 + 1 if `var' == 7
			replace num14 = num14 + 1 if `var' == 14 | `var' == 12
			replace num30 = num30 + 1 if `var' == 30 | `var' == 24
		}
	}
	assert num3+num7+num14+num30+1 == payments if bound == 1 & daily == 1
	duplicates drop
	drop bound daily
	isid id
	tempfile userdat
	save `userdat'
restore


********************************************************************************
*** hour-by-hour insurance activity
*fill in the hours from usercreation to three months after bind using tsfill

*sometimes multiple things happen in the same hour (turning on coverage and adding balance)
bysort id hour: gen nobs=_N
tab nobs
*collapse at the hourly level, keeping track of those hours where coverage is used or balance added
assert resbal!=0 if nobs>1
collapse (max) cov resbal regbal, by(id daily bound twomonth twomonthcr threemonth threemonthcr ///
	firstday hour_canceled hourfromcrtobind hour)

*create a separate dataset that has id and hour of threemonth limit
*that way, when tsfilling, this last day gets filled in
*same with first hour (hour of user creation relative to bind)
preserve
	keep id threemonth firstday
	duplicates drop
	isid id
	gen hour = round((threemonth-firstday)/(1000*60*60),1)
	drop threemonth
	tempfile idmax
	save `idmax'
restore
preserve
	keep id daily bound hour_canceled hourfromcrtobind twomonth twomonthcr threemonthcr
	drop if mi(bound)
	duplicates drop
	isid id
	gen hour = -1*hourfromcrtobind
	replace hour = 0 if mi(hourfromcrtobind)
	tempfile idmin
	save `idmin'
restore
append using `idmax'
append using `idmin'
bysort id: egen minhour = min(hour)
assert minhour==-1*hourfromcrtobind if !mi(hourfromcrtobind)
bysort id: egen maxhour = max(hour)
assert maxhour>=90
*sometimes there is an action that takes place in the last hour, so it gets duplicated
	*remove the artificially-added duplicate
bysort id hour: gen nobs=_N
drop if hour==maxhour & mi(bound) & nobs==2
drop if hour==0 & mi(firstday) & nobs==2

drop if mi(id)
encode id, gen(idfac)

*fill in min to max hour for everyone
tsset idfac hour
tsfill


*confirm that the last hour puts us within an hour of the study end
gen threemonthhour = firstday + maxhour*1000*60*60
assert abs(threemonth - threemonthhour) < 1000*60*60 if !mi(threemonth)

***fill in id-level data for each hour
sort idfac
foreach var of varlist id daily bound firstday ///
	twomonth twomonthcr threemonthcr hour_canceled hourfromcrtobind {
	by idfac: egen `var'all = mode(`var')
	replace `var' = `var'all
	drop `var'
	rename `var'all `var'
}

*calculate two and three month in hours
gen istwomonth = firstday + hour*(60*60*1000) < twomonth & hour>=0
assert !mi(istwomonth)
gen istwomonthcr = firstday + hour*(60*60*1000) < twomonthcr
assert !mi(istwomonthcr)
gen isthreemonthcr = firstday + hour*(60*60*1000) < threemonthcr
assert !mi(isthreemonthcr)

gen hourcr = hour+hourfromcrtobind
assert hourfromcrtobind>=0 if !mi(hourfromcrtobind)
replace hourcr = hour if bound==0
assert !mi(hourcr) if bound==1

sort id hour

	
********************************************************************************
*** fill in hours of having reserve and having coverage

*** has reserve
gen hasres = .
replace hasres = 1 if resbal!=0 & !mi(resbal)
replace hasres = 0 if resbal==0 | bound==0
replace hasres = 0 if mi(hasres) & hour==minhour
*user has reserve until they don't, fill in those missings
bysort id: carryforward hasres, replace
assert hasres==0 if bound==0

*** has a regular balance
gen hasreg = .
replace hasreg = 1 if regbal!=0 & !mi(regbal)
replace hasreg = 0 if regbal==0 | bound==0
replace hasreg = 0 if mi(hasreg) & hour==minhour
*user has regular balance until they don't
bysort id: carryforward hasreg, replace
assert hasreg==0 if bound==0

*last coverage and action variables
gen lastcov = hour if cov==1
gen lastact = hour if cov==1 | (resbal!=0 & !mi(resbal))
bysort id: carryforward resbal, replace
bysort id: carryforward regbal, replace
bysort id: carryforward lastcov, replace
bysort id: carryforward lastact, replace
bysort id: egen finalcov = max(lastcov)
bysort id: egen finalact = max(lastact)
gen hascov = (hour-lastcov)<24 & !mi(lastcov)
assert hascov==0 if bound==0

*fill in coverage "blips"
*if coverage in previous and next hour, fill in that hour as having coverage
*sometimes the automatic coverage doesn't start right at 24 hours
replace hascov = 1 if id==id[_n-1] & id==id[_n+1] & ///
	hascov==0 & hascov[_n-1]==1 & hascov[_n+1]==1

*indicate where there is balance and no coverage
gen balnocov = regbal!=0 & !mi(regbal) & hascov==0
replace balnocov = 0 if hour>hour_canceled & !mi(hour_canceled)
replace balnocov=. if daily==0
gen spell1 = (hasres==1 & hascov==1 & hasres[_n-1]==1 & hascov[_n-1]==0) | hour==0
bysort id: gen spell = sum(spell1)
bysort id spell: egen balnocovlength = sum(balnocov)

*confirm that no user has a positive regular balance and no coverage for more than 10 days
assert balnocovlength<10*24 if !mi(balnocovlength) & bound==1 & daily==1

*consider user to have a balance for the time that they are using that last coverage 
replace hasres = 1 if hascov==1
*still has regular balance when coverage is occurring and they still have 10 reserve days
replace hasreg = 1 if hascov==1 & inlist(resbal, 10)

*** coverage that occurs with regular balance
gen hascovreg = hascov==1 & hasreg==1

bysort id: egen lasthour = max(hour)
by id: egen lastadd = max(lastact==finalcov)

*replace for non-daily, cancelled users
replace hasres = 1 if daily==0 & bound==1
replace hasreg = 1 if daily==0 & bound==1
replace hascov = 1 if daily==0 & bound==1
replace hascovreg = 1 if daily==0 & bound==1

replace hasres = 0 if hour>=hour_canceled
replace hasreg = 0 if hour>=hour_canceled
replace hascov = 0 if hour>=hour_canceled
replace hascovreg = 0 if hour>=hour_canceled
replace hasres = . if hour>=hour_canceled+24
replace hascov = . if hour>=hour_canceled+24
replace hasreg = . if hour>=hour_canceled+24
replace hascovreg = . if hour>=hour_canceled+24

*time between last coverage and end of study
gen finalcovtoend = (lasthour-finalcov)/24
replace finalcovtoend = 0 if daily==0 & bound==1

keep id daily bound idfac lasthour hour hourcr hascov hasres hasreg hascovreg ///
	istwomonth istwomonthcr isthreemonth ///
	finalcovtoend resbal lastact finalact lastadd balnocov hour_canceled hourfromcrtobind
isid id hour

merge m:1 id using `userdat', assert(3) nogen

gen day = hour/24
gen daycr = hourcr/24


********************************************************************************
*** collapse to user-level data for sanity checks

preserve
	keep if hour>0
	gen fullday = floor(day)
	bysort id: egen lastday = max(fullday)
	bysort id fullday: gen nobs=_N
	count if nobs==1
	*assert that everyone has just one hour of the last day, remove that day
	assert `r(N)'==1537
	drop if nobs==1
	assert nobs>=23
	collapse (mean) hasres hasreg hascov, by(id bound daily bundle fullday ///
		finalcovtoend lasthour lastadd lastday hour_canceled daypurch)
	
	*rename for comparison
	rename fullday day
	rename hasres hugores
	rename hascov hugocov
	
	gen hourlength = lasthour/24
	*compare to collapsed action variables
	collapse (sum) reserve=hugores regerve=hasreg coverage=hugocov, by(id ///
		finalcovtoend hourlength lastadd daypurch)

	assert coverage <= reserve
	assert regerve <= reserve
restore

rename daypurch dayspurchased90

save "data/cleaned/hugo/hourly hugo action", replace

